Setup

Load R libraries

Check data

Read in TCR data

data_dir = "../../data/Zheng_2021/"

tcr_beta = fread("../../data/Zheng_2021_processed/TCR_beta.txt.gz")
dim(tcr_beta)
## [1] 143559     18
tcr_beta[1:2,]
##                barcode is_cell high_confidence length chain   v_gene d_gene
## 1: AAACCTGAGAAACCGC.60    TRUE            TRUE    706   TRB TRBV20-1  TRBD1
## 2:  AAACCTGAGAACTGTA.2    TRUE            TRUE    655   TRB   TRBV30  TRBD2
##     j_gene c_gene full_length productive             cdr3
## 1: TRBJ2-7  TRBC2        TRUE       TRUE CSASIGGGLRLGEQYF
## 2: TRBJ1-1  TRBC1        TRUE       TRUE   CAWSPGTVNTEAFF
##                                             cdr3_nt reads umis       library.id
## 1: TGCAGTGCTAGCATAGGGGGGGGTTTAAGGCTGGGCGAGCAGTACTTC  9872   18 THCA-P20190122-N
## 2:       TGTGCCTGGAGTCCCGGGACGGTGAACACTGAAGCTTTCTTT 25215   29   BC-P20190403-T
##    cdr3_length                                  key
## 1:          16 THCA-P20190122-N:AAACCTGAGAAACCGC.60
## 2:          14    BC-P20190403-T:AAACCTGAGAACTGTA.2

Use gene expression data to select tumor reactive T cells

Read in gene expression signaatures

Hanada_2022

These are lung cancer specific signatures.

CD4_Hanada = scan(file="../../data/Hanada_2022/CD4_signature.txt", 
                  what=character(0))
CD4_Hanada
##  [1] "CXCL13+" "NR3C1+"  "ADGRG1+" "NMG+"    "ITM2A+"  "ETV7+"   "COTL1+" 
##  [8] "B2M+"    "IGFL2+"  "VIM-"    "MT-ND1-" "MALAT1-" "EMP3-"
Hanada = list(CD4=gsub("(\\+|-)$", "", CD4_Hanada),
              CD4_sign=str_extract(CD4_Hanada, pattern="(\\+|-)$"))
Hanada
## $CD4
##  [1] "CXCL13" "NR3C1"  "ADGRG1" "NMG"    "ITM2A"  "ETV7"   "COTL1"  "B2M"   
##  [9] "IGFL2"  "VIM"    "MT-ND1" "MALAT1" "EMP3"  
## 
## $CD4_sign
##  [1] "+" "+" "+" "+" "+" "+" "+" "+" "+" "-" "-" "-" "-"

Lowery_2022

Lowery et al. (2022) not only generated their own gene signatures, but also compiled a long list of signatures from other studies. Here we first check their own signatures.

clean_list <- function(x){
  for(i in 1:length(x)){
    x[[i]] = as.character(na.omit(x[[i]]))
  }
  x
}

sig_Lowery = read_excel("../../data/Lowery_2022/science.abl5447_table_s4.xlsx",
                  sheet="Signatures from this study")
dim(sig_Lowery)
## [1] 243  14
sig_Lowery[1:2,]
## # A tibble: 2 × 14
##   `Cluster 0` `Cluster 1` `Cluster 2` `Cluster 3` `Cluster 4` `Cluster 5`
##   <chr>       <chr>       <chr>       <chr>       <chr>       <chr>      
## 1 RPS4Y1      CXCL13      LGALS1      CREM        ZNF683      EGR1       
## 2 TUBA1B      TIGIT       ANXA1       SNHG12      XCL2        DUSP2      
## # … with 8 more variables: Cluster 6 <chr>, Cluster 7 <chr>, Cluster 8 <chr>,
## #   Cluster 9 <chr>, Cluster 10 <chr>, Cluster 11 <chr>, NeoTCR4 <chr>,
## #   NeoTCR8 <chr>
Lowery = list(CD4 = sig_Lowery$NeoTCR4)
Lowery = clean_list(Lowery)
sapply(Lowery, length)
## CD4 
##  40
NeoT4 = read_excel("../../data/Lowery_2022/science.abl5447_table_s8.xlsx",
                   sheet="NeoTCR4")
dim(NeoT4)
## [1] 111   7
NeoT4[1:2,]
## # A tibble: 2 × 7
##   Gene       p_val avg_logFC pct.1 pct.2 p_val_adj cluster   
##   <chr>      <dbl>     <dbl> <dbl> <dbl>     <dbl> <chr>     
## 1 CXCL13 6.13e-137     1.13  0.706 0.182 8.87e-133 CD4_NeoTCR
## 2 HMOX1  2.17e- 57     0.877 0.288 0.065 3.14e- 53 CD4_NeoTCR
table(NeoT4$avg_logFC < 0, NeoT4$p_val_adj < 0.05)
##        
##         FALSE TRUE
##   FALSE     1   61
##   TRUE     12   37
table(Lowery$CD4 %in% NeoT4$Gene)
## 
## TRUE 
##   40
wmat = match(Lowery$CD4, NeoT4$Gene)
max(NeoT4$p_val_adj[wmat])
## [1] 0.02022774
w4 = NeoT4$avg_logFC < 0 & NeoT4$p_val_adj < 0.05

Lowery_negative = list(CD4 = NeoT4$Gene[w4])
sapply(Lowery_negative, length)
## CD4 
##  37

Next check the signatures from other studies

sigs = read_excel("../../data/Lowery_2022/science.abl5447_table_s4.xlsx",
                  sheet="Published signatures")
dim(sigs)
## [1] 108 107
sigs[1:2,]
## # A tibble: 2 × 107
##   Krishna.ACT.Stem.Like Krishna.ACT.Term.Di… Li.CD8.DYS TOX_Scott Wu.8EFF Wu.8EM
##   <chr>                 <chr>                <chr>      <chr>     <chr>   <chr> 
## 1 KLF2                  ENTPD1               LAG3       IGKC      GNLY    GZMK  
## 2 RASA3                 CD69                 HAVCR2     AVIL      NKG7    CCL4  
## # … with 101 more variables: Wu.8Trm.1 <chr>, Wu.8Trm.2 <chr>, Wu.8Trm.3 <chr>,
## #   Wu.8chrom <chr>, Wu.8Mit <chr>, Wu.8KLRB1 <chr>, Oh.CD8.CD39 <chr>,
## #   Oh.CD8.Naive <chr>, Oh.CD8.HSP <chr>, Oh.CD8.MAIT <chr>,
## #   Oh.CD8.FGFBP2 <chr>, Oh.CD8.RPL <chr>, Oh.CD8.MITO <chr>, Oh.CD8.XCL <chr>,
## #   Oh.CD8.CM <chr>, Oh.CD8.PRO <chr>, Mel_Exhaust_Tirosh <chr>,
## #   CD8_G_Feldman <chr>, CD8_B_Feldman <chr>, Exhaust_1_Feldman <chr>,
## #   Exhaust_2_Feldman <chr>, Exhaust_3_Feldman <chr>, …
aucs = read_excel("../../data/Lowery_2022/science.abl5447_table_s12.xlsx")
dim(aucs)
## [1] 108   6
aucs[1:2,]
## # A tibble: 2 × 6
##   `Gene Signatures`            `CD4 training` `CD8 training` `CD4 validation`
##   <chr>                                 <dbl>          <dbl>            <dbl>
## 1 B16_PROG.EX_Miller..40g.              0.404          0.345            0.384
## 2 B16_TERMINAL.EX_Miller..27g.          0.486          0.75             0.484
## # … with 2 more variables: CD8 validation <dbl>, Public viral TCRs <dbl>
order.train = order(aucs$`CD4 training`, decreasing=TRUE)[1:5]
order.valid = order(aucs$`CD4 validation`, decreasing=TRUE)[1:5]
aucs[order.train,]
## # A tibble: 5 × 6
##   `Gene Signatures`       `CD4 training` `CD8 training` `CD4 validation`
##   <chr>                            <dbl>          <dbl>            <dbl>
## 1 NeoTCR4.40..40g.                 0.902          0.802            0.842
## 2 Jansen_Term.diff..73g.           0.866          0.784            0.813
## 3 Caushi.CD4.Tfh.2...66g.          0.844          0.74             0.819
## 4 Cluster.1..50g.                  0.832          0.561            0.743
## 5 Oh.CD4.CXCL13..49g.              0.827          0.725            0.824
## # … with 2 more variables: CD8 validation <dbl>, Public viral TCRs <dbl>
aucs[order.valid,]
## # A tibble: 5 × 6
##   `Gene Signatures`       `CD4 training` `CD8 training` `CD4 validation`
##   <chr>                            <dbl>          <dbl>            <dbl>
## 1 NeoTCR4.40..40g.                 0.902          0.802            0.842
## 2 Oh.CD4.CXCL13..49g.              0.827          0.725            0.824
## 3 Caushi.CD4.Tfh.2...66g.          0.844          0.74             0.819
## 4 Jansen_Term.diff..73g.           0.866          0.784            0.813
## 5 CD8_B_Feldman..50g.              0.736          0.755            0.773
## # … with 2 more variables: CD8 validation <dbl>, Public viral TCRs <dbl>
CD4.sigs = intersect(aucs$`Gene Signatures`[order.train], 
                     aucs$`Gene Signatures`[order.valid])
CD4.sigs
## [1] "NeoTCR4.40..40g."        "Jansen_Term.diff..73g." 
## [3] "Caushi.CD4.Tfh.2...66g." "Oh.CD4.CXCL13..49g."
sapply(sigs[grep("Oh.CD4", names(sigs))], function(x){length(na.omit(x))})
##       Oh.CD4.CM   Oh.CD4IL2RAHI   Oh.CD4IL2RALO Oh.CD4.Activate     Oh.CD4.GZMB 
##              50              50              50              50              50 
##     Oh.CD4.GZMK     Oh.CD4.TH17   Oh.CD4.CXCL13     Oh.CD4.MITO      Oh.CD4.HSP 
##              50              50              50              14              50 
##   Oh.CD4.PROLIF 
##              50
sigs_CD4 = list(Lowery.CD4.neg.37g = Lowery_negative[["CD4"]], 
                Jansen_Term.diff.73g = sigs[["Jansen_Term diff"]],
                Caushi.CD4.Tfh.2.66g = sigs[["Caushi.CD4-Tfh(2)"]],
                Oh.CD4.CXCL13.50g    = sigs[["Oh.CD4.CXCL13"]],
                Lowery.CD4.40g = Lowery$CD4)
sigs_CD4 = clean_list(sigs_CD4)
sapply(sigs_CD4, length)
##   Lowery.CD4.neg.37g Jansen_Term.diff.73g Caushi.CD4.Tfh.2.66g 
##                   37                   73                   66 
##    Oh.CD4.CXCL13.50g       Lowery.CD4.40g 
##                   50                   40

Process gene expression of CD4 T cells

cd4T_files = list.files(data_dir, pattern="10X.CD4")
cd4T_files
## [1] "GSE156728_BC_10X.CD4.counts.txt.gz"  
## [2] "GSE156728_BCL_10X.CD4.counts.txt.gz" 
## [3] "GSE156728_ESCA_10X.CD4.counts.txt.gz"
## [4] "GSE156728_MM_10X.CD4.counts.txt.gz"  
## [5] "GSE156728_PACA_10X.CD4.counts.txt.gz"
## [6] "GSE156728_RC_10X.CD4.counts.txt.gz"  
## [7] "GSE156728_THCA_10X.CD4.counts.txt.gz"
## [8] "GSE156728_UCEC_10X.CD4.counts.txt.gz"
for(f1 in cd4T_files){
  d1 = fread(file.path(data_dir, f1))
  stopifnot(anyDuplicated(names(d1)) == 0)

  print(sprintf("file: %s, dimension: (%d,%d)", f1, nrow(d1), ncol(d1)))

  Tcells = which(names(d1) %in% tcr_beta$barcode)
  dT = data.matrix(d1[,..Tcells])
  rownames(dT) = d1$V1
  
  print(sprintf("%d/%d cells with TCR", length(Tcells), ncol(d1)))

  gs = list()
  
  for(s1 in names(sigs_CD4)){
    match.gene = na.omit(match(sigs_CD4[[s1]], d1$V1))
    print(sprintf("%s: %d/%d genes are found.",  
                  s1, length(match.gene), length(sigs_CD4[[s1]])))
    
    gs[[s1]] = d1$V1[match.gene]
  }
  
  sce = SingleCellExperiment(assays=list(counts=dT))
  sce = logNormCounts(sce)
  sce

  gsva.es = gsva(counts(sce), gs, method="ssgsea", verbose=FALSE)

  stopifnot(all(colnames(gsva.es) == colnames(sce)))
  
  # some signature genes appear multiple times
  sig_genes = unlist(gs[-1])
  print("the frequency of sigature genes:")
  print(table(table(sig_genes)))
  
  mat1 = match(sig_genes, rownames(sce))
  stopifnot(all(! is.na(mat1)))
  
  mat2 = match(Hanada$CD4[which(Hanada$CD4_sign == "+")], rownames(sce))
  mat2 = na.omit(mat2)
  
  mat3 = match(Hanada$CD4[which(Hanada$CD4_sign == "-")], rownames(sce))
  mat3 = na.omit(mat3)

  print(sprintf("%d/%d genes are found for Hanada postive/negative signatures",  
                  length(mat2), length(mat3)))

  ave_combined = colMeans(logcounts(sce)[mat1,])
  ave_Hanada = colMeans(logcounts(sce)[mat2,])
  ave_Hanada_neg = colMeans(logcounts(sce)[mat3,])

  sigs = data.frame(ave_Hanada_neg, t(gsva.es)[,1], 
                    ave_Hanada, ave_combined, t(gsva.es)[,-1])
  names(sigs)[2] = rownames(gsva.es)[1]
  dim(sigs)
  sigs[1:2,]
  sigs = scale(sigs)
  
  gc = ggcorrplot(cor(sigs), lab = TRUE)
  print(gc)
  
  unq_genes = unique(c(unlist(gs), rownames(sce)[mat2], rownames(sce)[mat3]))
  
  print("number of genes used for PCA:")
  print(length(unq_genes))
  
  stopifnot(all(unq_genes %in% d1$V1))
  match.gene = match(unq_genes, d1$V1)
  
  sce = runPCA(sce, ncomponents=20, subset_row=match.gene)
  set.seed(100100)
  sce = runUMAP(sce, dimred="PCA")
  
  clust = clusterCells(sce, use.dimred="PCA", 
                       BLUSPARAM=NNGraphParam(cluster.fun="louvain"))
  colData(sce)$louvain = clust

  default_palette = scales::hue_pal()(length(unique(clust)))

  g1 = plotUMAP(sce, colour_by=I(colData(sce)$louvain), 
                    point_size=0.6) + 
  guides(color = guide_legend(override.aes = list(size = 2))) + 
  scale_color_manual(values = default_palette)
  print(g1)

  colData(sce)[["ssGSEA"]]   = colMeans(gsva.es[-1,])
  colData(sce)[["positive"]] = rowMeans(sigs[,-(1:2)])
  colData(sce)[["negative"]] = rowMeans(sigs[,(1:2)])

  colData(sce) = cbind(colData(sce), reducedDim(sce, "UMAP"))
  
  df = as.data.frame(colData(sce))
  stopifnot(ncol(df) == 7)
  names(df)[6:7] = c("UMAP1", "UMAP2")

  g2 = ggplot(df, aes(x=louvain, y=ssGSEA, fill=louvain)) + 
      geom_boxplot() + 
      scale_fill_manual(values = default_palette)

  g3 = ggplot(df, aes(x=louvain, y=positive, fill=louvain)) + 
      geom_boxplot() + 
    scale_color_manual(values = default_palette)

  g4 = ggplot(df, aes(x=louvain, y=negative, fill=louvain)) + 
      geom_boxplot() + 
    scale_color_manual(values = default_palette)

  ga1 = ggarrange(g2, g3, g4, ncol = 3, nrow = 1, legend="top", 
                  common.legend=TRUE)
  print(ga1)
  
  ssGSEA_med = tapply(df$ssGSEA, df$louvain, median)
  
  df2 = data.frame(louvain = as.factor(as.numeric(names(ssGSEA_med))), 
                   ssGSEA = ssGSEA_med, 
                   positive = tapply(df$positive, df$louvain, median), 
                   negative = tapply(df$negative, df$louvain, median))
  
  g5 = ggplot(df2, aes(x=ssGSEA, y=positive, color=louvain)) + 
      geom_point(size=3) + 
      scale_color_manual(values = default_palette)

  g6 = ggplot(df2, aes(x=positive, y=negative, color=louvain)) + 
      geom_point(size=3) + 
      scale_color_manual(values = default_palette)

  ga2 = ggarrange(g5, g6, ncol = 2, nrow = 1, legend="top", 
                  common.legend=TRUE)
  print(ga2)

  fnm = sprintf("../../data/Zheng_2021_processed/%s.txt", 
                gsub(".counts.txt.gz", "", f1))
  
  df$cell = colnames(sce)
  fwrite(df, file=fnm, sep="\t")
  system(sprintf("gzip -f %s", fnm))
  
  print(sprintf("Done with %s", f1))
  print(date())
  print("------------------------------------------------------------------")
}
## [1] "file: GSE156728_BC_10X.CD4.counts.txt.gz, dimension: (24148,3064)"
## [1] "2497/3064 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 6548 genes with constant expression
## values throuhgout the samples.
## [1] "the frequency of sigature genes:"
## 
##   1   2   3   4 
## 150  27   5   1 
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

## [1] "Done with GSE156728_BC_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 17:14:09 2022"
## [1] "------------------------------------------------------------------"
## [1] "file: GSE156728_BCL_10X.CD4.counts.txt.gz, dimension: (28855,4238)"
## [1] "3491/4238 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 11462 genes with constant expression
## values throuhgout the samples.

## [1] "the frequency of sigature genes:"
## 
##   1   2   3   4 
## 150  27   5   1 
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

## [1] "Done with GSE156728_BCL_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 17:17:00 2022"
## [1] "------------------------------------------------------------------"
## [1] "file: GSE156728_ESCA_10X.CD4.counts.txt.gz, dimension: (24148,12359)"
## [1] "9578/12359 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 3681 genes with constant expression
## values throuhgout the samples.

## [1] "the frequency of sigature genes:"
## 
##   1   2   3   4 
## 150  27   5   1 
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

## [1] "Done with GSE156728_ESCA_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 18:22:43 2022"
## [1] "------------------------------------------------------------------"
## [1] "file: GSE156728_MM_10X.CD4.counts.txt.gz, dimension: (28855,3646)"
## [1] "2911/3646 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 11374 genes with constant expression
## values throuhgout the samples.

## [1] "the frequency of sigature genes:"
## 
##   1   2   3   4 
## 150  27   5   1 
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

## [1] "Done with GSE156728_MM_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 18:23:50 2022"
## [1] "------------------------------------------------------------------"
## [1] "file: GSE156728_PACA_10X.CD4.counts.txt.gz, dimension: (24148,3904)"
## [1] "3167/3904 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 6126 genes with constant expression
## values throuhgout the samples.

## [1] "the frequency of sigature genes:"
## 
##   1   2   3   4 
## 150  27   5   1 
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

## [1] "Done with GSE156728_PACA_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 18:24:46 2022"
## [1] "------------------------------------------------------------------"
## [1] "file: GSE156728_RC_10X.CD4.counts.txt.gz, dimension: (24148,10106)"
## [1] "8444/10106 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 4874 genes with constant expression
## values throuhgout the samples.

## [1] "the frequency of sigature genes:"
## 
##   1   2   3   4 
## 150  27   5   1 
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

## [1] "Done with GSE156728_RC_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 18:27:18 2022"
## [1] "------------------------------------------------------------------"
## [1] "file: GSE156728_THCA_10X.CD4.counts.txt.gz, dimension: (24148,23509)"
## [1] "18514/23509 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 2980 genes with constant expression
## values throuhgout the samples.

## [1] "the frequency of sigature genes:"
## 
##   1   2   3   4 
## 150  27   5   1 
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

## [1] "Done with GSE156728_THCA_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 18:33:15 2022"
## [1] "------------------------------------------------------------------"
## [1] "file: GSE156728_UCEC_10X.CD4.counts.txt.gz, dimension: (24148,12730)"
## [1] "10147/12730 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 4112 genes with constant expression
## values throuhgout the samples.

## [1] "the frequency of sigature genes:"
## 
##   1   2   3   4 
## 150  27   5   1 
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

## [1] "Done with GSE156728_UCEC_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 18:38:20 2022"
## [1] "------------------------------------------------------------------"

Session information

gc()
##             used   (Mb) gc trigger  (Mb) limit (Mb)   max used    (Mb)
## Ncells   9373640  500.7   17113998   914         NA   17113998   914.0
## Vcells 632508139 4825.7 1825172888 13925      32768 2281466103 17406.3
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggcorrplot_0.1.3            Rtsne_0.15                 
##  [3] svd_0.5                     bluster_1.4.0              
##  [5] cluster_2.1.2               scran_1.22.1               
##  [7] scater_1.22.0               scuttle_1.4.0              
##  [9] SingleCellExperiment_1.16.0 SummarizedExperiment_1.24.0
## [11] Biobase_2.54.0              GenomicRanges_1.46.1       
## [13] GenomeInfoDb_1.30.0         IRanges_2.28.0             
## [15] S4Vectors_0.32.3            BiocGenerics_0.40.0        
## [17] MatrixGenerics_1.6.0        matrixStats_0.61.0         
## [19] GSVA_1.42.0                 readxl_1.3.1               
## [21] stringr_1.4.0               ggpubr_0.4.0               
## [23] ggplot2_3.3.5               Matrix_1.3-4               
## [25] data.table_1.14.2          
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.4.0           plyr_1.8.6               
##   [3] igraph_1.2.9              GSEABase_1.56.0          
##   [5] BiocParallel_1.28.2       digest_0.6.29            
##   [7] htmltools_0.5.2           viridis_0.6.2            
##   [9] fansi_0.5.0               magrittr_2.0.1           
##  [11] memoise_2.0.1             ScaledMatrix_1.2.0       
##  [13] limma_3.50.0              Biostrings_2.62.0        
##  [15] annotate_1.72.0           R.utils_2.11.0           
##  [17] colorspace_2.0-2          blob_1.2.2               
##  [19] ggrepel_0.9.1             xfun_0.28                
##  [21] dplyr_1.0.7               crayon_1.4.2             
##  [23] RCurl_1.98-1.5            jsonlite_1.7.2           
##  [25] graph_1.72.0              glue_1.5.1               
##  [27] gtable_0.3.0              zlibbioc_1.40.0          
##  [29] XVector_0.34.0            DelayedArray_0.20.0      
##  [31] car_3.0-12                BiocSingular_1.10.0      
##  [33] Rhdf5lib_1.16.0           HDF5Array_1.22.1         
##  [35] abind_1.4-5               scales_1.1.1             
##  [37] DBI_1.1.1                 edgeR_3.36.0             
##  [39] rstatix_0.7.0             Rcpp_1.0.7               
##  [41] viridisLite_0.4.0         xtable_1.8-4             
##  [43] dqrng_0.3.0               bit_4.0.4                
##  [45] rsvd_1.0.5                metapod_1.2.0            
##  [47] httr_1.4.2                FNN_1.1.3                
##  [49] ellipsis_0.3.2            pkgconfig_2.0.3          
##  [51] XML_3.99-0.8              R.methodsS3_1.8.1        
##  [53] farver_2.1.0              sass_0.4.0               
##  [55] uwot_0.1.10               locfit_1.5-9.4           
##  [57] utf8_1.2.2                tidyselect_1.1.1         
##  [59] labeling_0.4.2            rlang_0.4.12             
##  [61] reshape2_1.4.4            AnnotationDbi_1.56.2     
##  [63] munsell_0.5.0             cellranger_1.1.0         
##  [65] tools_4.1.2               cachem_1.0.6             
##  [67] cli_3.1.0                 generics_0.1.1           
##  [69] RSQLite_2.2.8             broom_0.7.10             
##  [71] evaluate_0.14             fastmap_1.1.0            
##  [73] yaml_2.2.1                knitr_1.36               
##  [75] bit64_4.0.5               purrr_0.3.4              
##  [77] KEGGREST_1.34.0           sparseMatrixStats_1.6.0  
##  [79] R.oo_1.24.0               compiler_4.1.2           
##  [81] rstudioapi_0.13           beeswarm_0.4.0           
##  [83] png_0.1-7                 ggsignif_0.6.3           
##  [85] tibble_3.1.6              statmod_1.4.36           
##  [87] bslib_0.3.1               stringi_1.7.6            
##  [89] highr_0.9                 RSpectra_0.16-0          
##  [91] lattice_0.20-45           vctrs_0.3.8              
##  [93] pillar_1.6.4              lifecycle_1.0.1          
##  [95] rhdf5filters_1.6.0        jquerylib_0.1.4          
##  [97] RcppAnnoy_0.0.19          BiocNeighbors_1.12.0     
##  [99] cowplot_1.1.1             bitops_1.0-7             
## [101] irlba_2.3.3               R6_2.5.1                 
## [103] gridExtra_2.3             vipor_0.4.5              
## [105] codetools_0.2-18          assertthat_0.2.1         
## [107] rhdf5_2.38.0              withr_2.4.3              
## [109] GenomeInfoDbData_1.2.7    parallel_4.1.2           
## [111] grid_4.1.2                beachmat_2.10.0          
## [113] tidyr_1.1.4               rmarkdown_2.11           
## [115] DelayedMatrixStats_1.16.0 carData_3.0-4            
## [117] ggbeeswarm_0.6.0